To run this code in my project using the renv environment, run the following lines of code
install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile
library("ViSEAGO")
##
## Warning: replacing previous import 'data.table::set' by 'dendextend::set' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'dendextend::cutree' by 'stats::cutree' when
## loading 'ViSEAGO'
## Warning: replacing previous import 'AnnotationDbi::tail' by 'utils::tail' when
## loading 'ViSEAGO'
require("topGO")
## Loading required package: topGO
## Loading required package: BiocGenerics
## Loading required package: generics
##
## Attaching package: 'generics'
## The following objects are masked from 'package:base':
##
## as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
## setequal, union
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
## unsplit, which.max, which.min
## Loading required package: graph
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: GO.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: SparseM
##
## groupGOTerms: GOBPTerm, GOMFTerm, GOCCTerm environments built.
##
## Attaching package: 'topGO'
## The following object is masked from 'package:IRanges':
##
## members
require("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ ggplot2::annotate() masks ViSEAGO::annotate()
## ✖ stringr::boundary() masks graph::boundary()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::select() masks AnnotationDbi::select()
## ✖ dplyr::slice() masks IRanges::slice()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#BiocManager::install("GO.db")
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.4 forcats_1.0.1 stringr_1.6.0
## [4] dplyr_1.1.4 purrr_1.2.0 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.3.0 ggplot2_4.0.0
## [10] tidyverse_2.0.0 topGO_2.62.0 SparseM_1.84-2
## [13] GO.db_3.22.0 AnnotationDbi_1.72.0 IRanges_2.44.0
## [16] S4Vectors_0.48.0 Biobase_2.70.0 graph_1.88.0
## [19] BiocGenerics_0.56.0 generics_0.1.4 ViSEAGO_1.24.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 rstudioapi_0.17.1 jsonlite_2.0.0
## [4] shape_1.4.6.1 magrittr_2.0.4 farver_2.1.2
## [7] rmarkdown_2.30 GlobalOptions_0.1.2 fs_1.6.6
## [10] vctrs_0.6.5 memoise_2.0.1 RCurl_1.98-1.17
## [13] webshot_0.5.5 htmltools_0.5.8.1 progress_1.2.3
## [16] dynamicTreeCut_1.63-1 curl_7.0.0 sass_0.4.10
## [19] bslib_0.9.0 htmlwidgets_1.6.4 plyr_1.8.9
## [22] httr2_1.2.1 plotly_4.11.0 cachem_1.1.0
## [25] igraph_2.2.1 lifecycle_1.0.4 iterators_1.0.14
## [28] pkgconfig_2.0.3 Matrix_1.7-4 R6_2.6.1
## [31] fastmap_1.2.0 clue_0.3-66 digest_0.6.37
## [34] colorspace_2.1-2 RSQLite_2.4.3 seriation_1.5.8
## [37] filelock_1.0.3 timechange_0.3.0 httr_1.4.7
## [40] compiler_4.5.2 bit64_4.6.0-1 withr_3.0.2
## [43] doParallel_1.0.17 S7_0.2.0 BiocParallel_1.44.0
## [46] viridis_0.6.5 DBI_1.2.3 UpSetR_1.4.0
## [49] heatmaply_1.6.0 dendextend_1.19.1 R.utils_2.13.0
## [52] biomaRt_2.66.0 rappdirs_0.3.3 rjson_0.2.23
## [55] tools_4.5.2 R.oo_1.27.1 glue_1.8.0
## [58] DiagrammeR_1.0.11 GOSemSim_2.36.0 grid_4.5.2
## [61] cluster_2.1.8.1 fgsea_1.36.0 gtable_0.3.6
## [64] tzdb_0.5.0 R.methodsS3_1.8.2 ca_0.71.1
## [67] data.table_1.17.8 hms_1.1.4 XVector_0.50.0
## [70] foreach_1.5.2 pillar_1.11.1 yulab.utils_0.2.1
## [73] circlize_0.4.16 BiocFileCache_3.0.0 lattice_0.22-7
## [76] renv_1.1.5 bit_4.6.0 tidyselect_1.2.1
## [79] registry_0.5-1 ComplexHeatmap_2.26.0 Biostrings_2.78.0
## [82] knitr_1.50 gridExtra_2.3 Seqinfo_1.0.0
## [85] xfun_0.54 matrixStats_1.5.0 DT_0.34.0
## [88] visNetwork_2.1.4 stringi_1.8.7 lazyeval_0.2.2
## [91] yaml_2.3.10 evaluate_1.0.5 codetools_0.2-20
## [94] BiocManager_1.30.26 cli_3.6.5 jquerylib_0.1.4
## [97] dichromat_2.0-0.1 Rcpp_1.1.0 dbplyr_2.5.1
## [100] png_0.1-8 XML_3.99-0.19 parallel_4.5.2
## [103] assertthat_0.2.1 blob_1.2.4 prettyunits_1.2.0
## [106] AnnotationForge_1.52.0 bitops_1.0-9 viridisLite_0.4.2
## [109] scales_1.4.0 crayon_1.5.3 GetoptLong_1.0.5
## [112] rlang_1.1.6 cowplot_1.2.0 fastmatch_1.1-6
## [115] KEGGREST_1.50.0 TSP_1.2-5
I am going to perform functional enrichment of GO terms using ViSEAGO.
I am following this vignette: http://bioconductor.unipi.it/packages/devel/bioc/vignettes/ViSEAGO/inst/doc/ViSEAGO.html.
In the next chunk I am loading in my DESeq data. These results are ordered by adjusted p-value. As a reminder, negative LFC = higher in Oral tissue, and positive LFC = higher in Aboral tissue.
#load in DESeq results
DESeq <- read.csv("../output_RNA/differential_expression/DESeq_results.csv", header = TRUE) %>% dplyr::rename("query" ="X")
#make dataframes of just differentially expressed genes for each LFC direction - filtering a little more stringent, abs(LFC) >2
DE_05_Aboral <- DESeq %>% filter(padj < 0.05 & log2FoldChange > 1)
DE_05_OralEpi <- DESeq %>% filter(padj < 0.05& log2FoldChange < -1)
#load in annotation data
annot_tab <- read.delim("../references/annotation/protein-GO.tsv") %>% dplyr::rename(GOs = GeneOntologyIDs)
#filter annotation data for just expressed genes with GO annotations
annot_tab <- annot_tab %>% filter(query %in% DESeq$query)
annot_tab$GOs <- gsub("; ", ";", annot_tab$GOs)
annot_tab$GOs[annot_tab$GOs==""] <- NA
annot_tab <- annot_tab %>% filter(!is.na(GOs))
nrow(annot_tab)
## [1] 10638
nrow(annot_tab)/nrow(DESeq)
## [1] 0.7354812
10638/14464 genes in our dataset have GO information in this file. That is 74%.
sum(annot_tab$query %in% DE_05_Aboral$query)
## [1] 379
sum(annot_tab$query %in% DE_05_Aboral$query)/nrow(DE_05_Aboral)
## [1] 0.6792115
sum(annot_tab$query %in% DE_05_OralEpi$query)
## [1] 796
sum(annot_tab$query %in% DE_05_OralEpi$query)/nrow(DE_05_OralEpi)
## [1] 0.6546053
379/558 genes that are significantly upregulated in the Aboral tissue have annotation information. That is 68% of the genes.
796/1216 genes that are significantly upregulated in the Oral Epidermis tissue have annotation information. That is 71% of the genes.
##Get a list of GO Terms for all genes
annots <- annot_tab %>% dplyr::select(query,GOs,ProteinNames) %>% dplyr::rename("GO.terms" = GOs)
# format into the format required by ViSEAGO for custom mappings
Custom_GOs <- annots %>%
# Separate GO terms into individual rows
separate_rows(GO.terms, sep = ";") %>%
# Add necessary columns
mutate(
taxid = "pacuta",
gene_symbol = ProteinNames,
evidence = "SwissProt"
) %>%
# Rename columns
dplyr::rename(
gene_id = query,
GOID = GO.terms
) %>%
dplyr::select(taxid, gene_id, gene_symbol, GOID, evidence)
Custom_GOs_valid <- Custom_GOs %>% filter(GOID %in% keys(GO.db))
write.table(Custom_GOs_valid, "../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt",row.names = FALSE, sep = "\t", quote = FALSE,col.names=TRUE)
length(unique(Custom_GOs$gene_id))
## [1] 10638
length(unique(Custom_GOs_valid$gene_id))
## [1] 10637
We seem to have lost one gene when filtering for valid GO terms, so I need to account for that below.
Custom_Pacuta <- ViSEAGO::Custom2GO("../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt")
## 'select()' returned 1:1 mapping between keys and columns
myGENE2GO_Pacuta <- ViSEAGO::annotate(
id="pacuta",
Custom_Pacuta
)
selection <- DESeq %>% filter(query %in% Custom_GOs_valid$gene_id) %>%
mutate(DE_05_Aboral = ifelse(query %in% DE_05_Aboral$query, 1,0)) %>%
mutate(DE_05_Oral = ifelse(query %in% DE_05_OralEpi$query, 1,0)) %>%
mutate(expressed = 1)
selection_Aboral <- selection %>% pull(DE_05_Aboral) %>% as.factor()
names(selection_Aboral) <- selection %>% pull(query)
selection_Oral <- selection %>% pull(DE_05_Oral) %>% as.factor()
names(selection_Oral) <- selection %>% pull(query)
expressed <- selection %>% pull(expressed) %>% as.factor()
names(expressed) <- selection %>% pull(query)
# create viseago object
selection <- names(selection_Oral)[selection_Oral==1]
background <- names(expressed)
BP_Oral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8797 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12496 GO terms and 27095 relations. )
##
## Annotating nodes ...............
## ( 9496 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Oral <- topGO::runTest(
BP_Oral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 4392 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 19: 2 nodes to be scored (0 eliminated genes)
##
## Level 18: 2 nodes to be scored (0 eliminated genes)
##
## Level 17: 8 nodes to be scored (0 eliminated genes)
##
## Level 16: 18 nodes to be scored (0 eliminated genes)
##
## Level 15: 39 nodes to be scored (0 eliminated genes)
##
## Level 14: 42 nodes to be scored (61 eliminated genes)
##
## Level 13: 70 nodes to be scored (68 eliminated genes)
##
## Level 12: 156 nodes to be scored (96 eliminated genes)
##
## Level 11: 305 nodes to be scored (115 eliminated genes)
##
## Level 10: 460 nodes to be scored (323 eliminated genes)
##
## Level 9: 577 nodes to be scored (545 eliminated genes)
##
## Level 8: 635 nodes to be scored (630 eliminated genes)
##
## Level 7: 676 nodes to be scored (732 eliminated genes)
##
## Level 6: 625 nodes to be scored (1158 eliminated genes)
##
## Level 5: 439 nodes to be scored (1241 eliminated genes)
##
## Level 4: 242 nodes to be scored (1270 eliminated genes)
##
## Level 3: 79 nodes to be scored (1359 eliminated genes)
##
## Level 2: 16 nodes to be scored (1359 eliminated genes)
##
## Level 1: 1 nodes to be scored (1359 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list( Oral_elim = c("BP_Oral", "elim_Oral")))
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 60
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## Nontrivial_nodes: 4392
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 60 GO terms of 1 conditions.
## Oral_elim : 60 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 60
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## Nontrivial_nodes: 4392
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## - enrich GOs (in at least one list): 60 GO terms of 1 conditions.
## Oral_elim : 60 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Oral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the dendextend package.
## Please report the issue at <https://github.com/talgalili/dendextend/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Oral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Oral,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Oral_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Oral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Oral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned 1:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Oral,
"GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Oral <-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Oral,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Oral,
"GOclusters")
# create viseago object
selection <- names(selection_Aboral)[selection_Aboral==1]
background <- names(expressed)
BP_Aboral <- ViSEAGO::create_topGOdata(
geneSel=selection,
allGenes=background,
gene2GO=myGENE2GO_Pacuta,
ont="BP",
nodeSize=5
)
##
## Building most specific GOs .....
## ( 8797 GO terms found. )
##
## Build GO DAG topology ..........
## ( 12496 GO terms and 27095 relations. )
##
## Annotating nodes ...............
## ( 9496 genes annotated to the GO terms. )
# perform TopGO test using elim algorithm
elim_Aboral <- topGO::runTest(
BP_Aboral,
algorithm ="elim",
statistic = "fisher"
)
##
## -- Elim Algorithm --
##
## the algorithm is scoring 3092 nontrivial nodes
## parameters:
## test statistic: fisher
## cutOff: 0.01
##
## Level 17: 1 nodes to be scored (0 eliminated genes)
##
## Level 16: 6 nodes to be scored (0 eliminated genes)
##
## Level 15: 21 nodes to be scored (0 eliminated genes)
##
## Level 14: 24 nodes to be scored (0 eliminated genes)
##
## Level 13: 44 nodes to be scored (175 eliminated genes)
##
## Level 12: 85 nodes to be scored (236 eliminated genes)
##
## Level 11: 184 nodes to be scored (238 eliminated genes)
##
## Level 10: 284 nodes to be scored (291 eliminated genes)
##
## Level 9: 377 nodes to be scored (334 eliminated genes)
##
## Level 8: 450 nodes to be scored (585 eliminated genes)
##
## Level 7: 472 nodes to be scored (824 eliminated genes)
##
## Level 6: 489 nodes to be scored (1100 eliminated genes)
##
## Level 5: 366 nodes to be scored (1905 eliminated genes)
##
## Level 4: 194 nodes to be scored (2346 eliminated genes)
##
## Level 3: 78 nodes to be scored (2420 eliminated genes)
##
## Level 2: 16 nodes to be scored (2578 eliminated genes)
##
## Level 1: 1 nodes to be scored (2578 eliminated genes)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 118
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3092
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
## Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 118
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3092
## - enrichment pvalue cutoff:
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 118 GO terms of 1 conditions.
## Aboral_elim : 118 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2_Aboral <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 2
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2_Aboral)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2_Aboral,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_Aboral_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2_Aboral<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2_Aboral,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2_Aboral,
"GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2_Aboral<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2_Aboral,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2_Aboral,
"GOclusters")
clustered_Oral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Oral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Oral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) #%>%
#mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
#dplyr::select(cluster, Cluster.name)
clustered_Oral_DEGs_enrichedGO <- clustered_Oral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Oral")
clustered_Aboral_DEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2_Aboral@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2_Aboral,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_",""),
Cluster.term = str_replace(cluster_full,"^\\d+_",""),
Cluster.term = str_replace(Cluster.term,"_.*$","")
) #%>%
#mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) #%>%
#dplyr::select(cluster, Cluster.name)
clustered_Aboral_DEGs_enrichedGO <- clustered_Aboral_DEGs_enrichedGO %>%
left_join(cluster_map, by = c("GO.cluster" = "cluster")) %>%
mutate(Tissue="Aboral")
clustered_allDEGs_enrichedGO <- rbind(clustered_Oral_DEGs_enrichedGO %>%
dplyr::select(-contains("Oral")),
clustered_Aboral_DEGs_enrichedGO %>%
dplyr::select(-contains("Aboral")))
library(dplyr)
library(tidyr)
library(ggplot2)
# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
dplyr::select(Cluster.name, Tissue) %>%
group_by(Cluster.name) %>%
summarise(
Oral_terms = sum(Tissue=="Oral"),
Aboral_terms = sum(Tissue=="Aboral"),
.groups = "drop"
) %>%
mutate(
deviation = Aboral_terms - Oral_terms,
Oral_terms = -Oral_terms # Make negative for bidirectional plot
) %>%
pivot_longer(cols = c("Oral_terms", "Aboral_terms"),
names_to = "Tissue", values_to = "n_terms") %>%
mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
distinct(Cluster.name, deviation) %>%
arrange(deviation)
oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_clustered_separate.png", p, width = 10, height = 8, dpi = 300)
BP_Results <- ViSEAGO::merge_enrich_terms(
Input = list(Oral_elim = c("BP_Oral", "elim_Oral"),
Aboral_elim = c("BP_Aboral", "elim_Aboral"))
)
## 'select()' returned 1:1 mapping between keys and columns
# display the merged table
BP_Results
## - object class: enrich_GO_terms
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 60
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## Nontrivial_nodes: 4392
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 118
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3092
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 178 GO terms of 2 conditions.
## Oral_elim : 60 terms
## Aboral_elim : 118 terms
ViSEAGO::show_table(BP_Results)
# print the merged table in a file
ViSEAGO::show_table(
BP_Results,
"../output_RNA/differential_expression/semantic-enrichment/DE_05.csv"
)
# initialize
myGOs<-ViSEAGO::build_GO_SS(
gene2GO=myGENE2GO_Pacuta,
enrich_GO_terms=BP_Results
)
## 'select()' returned 1:1 mapping between keys and columns
myGOs <- ViSEAGO::compute_SS_distances(
myGOs,
distance=c("Wang")
)
myGOs
## - object class: GO_SS
## - ontology: BP
## - method: topGO
## - summary:
## Oral_elim
## BP_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 796
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Oral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 60
## feasible_genes: 9496
## feasible_genes_significant: 687
## genes_nodeSize: 5
## Nontrivial_nodes: 4392
## Aboral_elim
## BP_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## available_genes: 10637
## available_genes_significant: 379
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## nodes_number: 6675
## edges_number: 14047
## elim_Aboral
## description: Custom pacuta ../output_RNA/differential_expression/semantic-enrichment/custom_GOs.txt
## test_name: fisher p<0.01
## algorithm_name: elim
## GO_scored: 6675
## GO_significant: 118
## feasible_genes: 9496
## feasible_genes_significant: 335
## genes_nodeSize: 5
## Nontrivial_nodes: 3092
## - enrichment pvalue cutoff:
## Oral_elim : 0.01
## Aboral_elim : 0.01
## - enrich GOs (in at least one list): 178 GO terms of 2 conditions.
## Oral_elim : 60 terms
## Aboral_elim : 118 terms
## - terms distances: Wang
# display MDSplot
ViSEAGO::MDSplot(myGOs,
"GOterms")
# GOterms heatmap with the default parameters
Wang_clusters_wardD2 <- ViSEAGO::GOterms_heatmap(
myGOs,
showIC = TRUE,
showGOlabels = TRUE,
GO.tree = list(
tree = list(distance = "Wang", aggreg.method = "ward.D2"),
cut = list(
dynamic = list(
pamStage = TRUE,
pamRespectsDendro = TRUE,
deepSplit = 2,
minClusterSize = 4
)
)
),
samples.tree = NULL
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# Display the clusters-heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOterms")
# Display the clusters-heatmap table
ViSEAGO::show_table(Wang_clusters_wardD2)
# Print the clusters-heatmap table
ViSEAGO::show_table(
Wang_clusters_wardD2,
"../output_RNA/differential_expression/semantic-enrichment/DE_05_cluster_heatmap_Wang_wardD2.csv"
)
# display colored MDSplot
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOterms")
# calculate semantic similarities between clusters of GO terms
Wang_clusters_wardD2<-ViSEAGO::compute_SS_distances(
Wang_clusters_wardD2,
distance=c("max", "avg","rcmax", "BMA")
)
## 'select()' returned many:1 mapping between keys and columns
# MDSplot - one point per cluster
ViSEAGO::MDSplot(
Wang_clusters_wardD2,
"GOclusters")
# GOclusters heatmap
Wang_clusters_wardD2<-ViSEAGO::GOclusters_heatmap(
Wang_clusters_wardD2,
tree=list(
distance="BMA",
aggreg.method="ward.D2"
)
)
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
# display the GOClusters heatmap
ViSEAGO::show_heatmap(
Wang_clusters_wardD2,
"GOclusters")
clustered_allDEGs_enrichedGO <- as.data.frame(Wang_clusters_wardD2@enrich_GOs@data)
cluster_map <- data.frame(cluster_full = rownames(as.matrix(slot(Wang_clusters_wardD2,"clusters_dist")[["BMA"]])))
cluster_map <- cluster_map %>%
mutate(
cluster = str_extract(cluster_full, "\\d+"),
Cluster.name = str_replace(cluster_full,".*?_.*?_","")
) %>%
mutate(Cluster.name = paste0("Cluster ", cluster, ": ", Cluster.name)) %>%
select(cluster, Cluster.name)
cluster_map
## cluster Cluster.name
## 1 1 Cluster 1: neuron development
## 2 2 Cluster 2: developmental process
## 3 3 Cluster 3: anatomical structure development
## 4 4 Cluster 4: anatomical structure development
## 5 5 Cluster 5: multicellular organism development
## 6 6 Cluster 6: sensory organ development
## 7 7 Cluster 7: system development
## 8 8 Cluster 8: animal organ development
## 9 9 Cluster 9: transport
## 10 10 Cluster 10: transport
## 11 11 Cluster 11: cell adhesion
## 12 12 Cluster 12: regulation of cell adhesion
## 13 13 Cluster 13: external encapsulating structure organization
## 14 14 Cluster 14: cell junction assembly
## 15 15 Cluster 15: biological_process
## 16 16 Cluster 16: secretion by cell
## 17 17 Cluster 17: cell-cell signaling
## 18 18 Cluster 18: signaling
## 19 19 Cluster 19: response to stimulus
## 20 20 Cluster 20: immune effector process
## 21 21 Cluster 21: biological_process
## 22 22 Cluster 22: response to external stimulus
## 23 23 Cluster 23: multicellular organismal process
## 24 24 Cluster 24: nervous system process
## 25 25 Cluster 25: metabolic process
## 26 26 Cluster 26: metabolic process
cluster_map_fixed <- cluster_map #%>%
# mutate(Cluster.name = str_replace(Cluster.name, "Cluster 2: developmental process", "Cluster 2: cell fate determination"),
# Cluster.name = str_replace(Cluster.name, "Cluster 4: anatomical structure development", "Cluster 4: regeneration & tissue morphogenesis"),
# Cluster.name = str_replace(Cluster.name, "Cluster 5: multicellular organism development", "Cluster 5: multicellular organism pattern formation"),
# Cluster.name = str_replace(Cluster.name, "Cluster 9: transport", "Cluster 9: metal ion transmembrane transport"),
# Cluster.name = str_replace(Cluster.name, "Cluster 10: transport", "Cluster 10: amino acid & small molecule transmembrane transport"),
# Cluster.name = str_replace(Cluster.name, "Cluster 12: nervous system process", "Cluster 12: sensory perception"),
# Cluster.name = str_replace(Cluster.name, "Cluster 16: biological_process", "Cluster 16: regeneration & wound healing"),
# Cluster.name = str_replace(Cluster.name, "Cluster 18: secretion by cell", "Cluster 18: neurotransmitter and hormone secretion"),
# Cluster.name = str_replace(Cluster.name, "Cluster 24: external encapsulating structure organization", "Cluster 24: extracellular matrix organization"),
# Cluster.name = str_replace(Cluster.name, "Cluster 25: biological_process", "Cluster 25: cell organization and homeostasis")
# )
clustered_allDEGs_enrichedGO <- clustered_allDEGs_enrichedGO %>%
left_join(cluster_map_fixed, by = c("GO.cluster" = "cluster"))
write.csv(clustered_allDEGs_enrichedGO,"../output_RNA/differential_expression/semantic-enrichment/DE_05_clusters_named.csv")
library(DT)
# Interactive table with scrolling
datatable(clustered_allDEGs_enrichedGO,
options = list(
pageLength = 10, # rows per page
scrollX = TRUE, # horizontal scroll if wide
scrollY = "400px" # vertical scroll
))
library(dplyr)
library(tidyr)
library(ggplot2)
# Create bidirectional data
cluster_data <- clustered_allDEGs_enrichedGO %>%
select(Cluster.name, Oral_elim.Significant_genes, Aboral_elim.Significant_genes) %>%
mutate(
# Clean cluster names
#Cluster.name = gsub("^Cluster \\d+:\\s*", "", Cluster.name),
Oral_terms = ifelse(!is.na(Oral_elim.Significant_genes), 1, 0),
Aboral_terms = ifelse(!is.na(Aboral_elim.Significant_genes), 1, 0)
) %>%
group_by(Cluster.name) %>%
summarise(
Oral_terms = sum(Oral_terms),
Aboral_terms = sum(Aboral_terms),
.groups = "drop"
) %>%
filter(Oral_terms + Aboral_terms > 0) %>%
mutate(
deviation = Aboral_terms - Oral_terms,
Oral_terms = -Oral_terms # Make negative for bidirectional plot
) %>%
pivot_longer(cols = c("Oral_terms", "Aboral_terms"),
names_to = "Tissue", values_to = "n_terms") %>%
mutate(Tissue = ifelse(Tissue == "Oral_terms", "Oral epidermis", "Aboral"))
datatable(cluster_data,
options = list(
pageLength = 10, # rows per page
scrollX = TRUE, # horizontal scroll if wide
scrollY = "400px" # vertical scroll
))
# Calculate transition points for dashed lines
transition_data <- cluster_data %>%
distinct(Cluster.name, deviation) %>%
arrange(deviation)
oral_to_equal <- sum(transition_data$deviation >= 0) + 0.5
equal_to_aboral <- sum(transition_data$deviation > 0) + 0.5
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional.png", p, width = 10, height = 8, dpi = 300)
print(p)
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12, hjust=0)
) +
scale_y_continuous(labels = abs) +
scale_x_discrete(position = "top") +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_rightaxis.png", p, width = 10, height = 8, dpi = 300)
print(p)
# Plot
p <- ggplot(cluster_data, aes(x = reorder(Cluster.name, -deviation), y = n_terms, fill = Tissue)) +
geom_col(width = 0.7, alpha = 0.8) +
geom_hline(yintercept = 0, color = "black", linewidth = 0.8) +
geom_vline(xintercept = c(oral_to_equal, equal_to_aboral),
color = "grey60", linetype = "dashed", linewidth = 0.6) +
scale_fill_manual(values = c("Oral epidermis" = "#FD8D3C", "Aboral" = "#756BB1")) + #green: "#2E8B57"
#coord_flip() +
theme_minimal() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "right",
legend.title = element_text(face = "bold"),
axis.title = element_text(face = "bold"),
axis.text = element_text(color = "black"),
axis.text.x = element_text(size = 12,angle = 65, hjust = 1),
axis.text.y = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
labs(
x = "Gene clusters",
y = "Number of enriched GO terms",
title = "Tissue-specific GO term enrichment patterns",
subtitle = "Clusters ordered by net enrichment (Oral vs. Aboral)"
)
p
ggsave("../output_RNA/differential_expression/semantic-enrichment/GO_enrichment_bidirectional_horiz.png", p, width = 12, height = 8, dpi = 300)